Null Model of Graphs

Preliminaries

Load in the packages we need…

library(igraph)
library(ggraph)
library(cowplot)
library(tidyverse)

# create a custom function we will use to plot degree distributions...
plot_dd <- function(g) {
  d <- tibble(
  degree = seq(0, length(degree_distribution(g))-1, by = 1),
  rel_freq = degree_distribution(g)
)
  ggplot(d, aes(x= degree, y = rel_freq)) +
    geom_bar(stat = "identity") +
    xlab("Degree") +
    ylab("Relative Frequency") +
    scale_x_continuous(breaks = seq(0, length(degree_distribution(g))-1, by = 1)) +
    ggtitle(paste0( vcount(g), " nodes, ", ecount(g), " edges\nDeg Dist: (mean deg: ", round(mean(degree(g)),2), ", var deg: ", round(var(degree(g)), 2), ")\nGlobal Transitivity: ", round(transitivity(g, type = "global"), 4), "\nDiameter: ", diameter(g), "\nEdge Density: ", round(edge_density(g),4)))
}

Erdos-Renyi Random Networks

Erdos-Reyni simple random graphs can be created by specifying a number of nodes and either the total number of edges in the graph OR a probability of any given pair of nodes being connected.

# specify the number of nodes, n, and the total number of edges in the graph, m
g <-sample_gnm(n = 12, m = 12) # run this several times
plot(g)

# or

g <-sample_gnm(n = 12, m = 12) %>% plot()

g <- sample_gnm(n = 12, m = 66) %>% plot()

NOTE: A graph with 12 nodes can have up to (12 x 11)/2 = 66 total edges, thus the line of code above results in a graph with the 100% of dyads connected.

# specify the number of nodes and the probability of any two nodes being connected
g <- sample_gnp(n = 12, p = 0.182) %>% plot()

g <- sample_gnp(n = 12, p = 1) %>% plot()

# note that with p = 1, all of dyads are connected

Create and visualize 100 random graphs with the same properties using sample_gnm()

We first use the list() function to create empty lists to hold the simulated graphs and their properties…

g <- list()
deg <- list()
diam <- list()
plot <- list()
trans <- list()

n <- 12
m <- 12
for (i in 1:100){
  g[[i]] <- sample_gnm(n = n, m = m)
  diam[[i]] <- diameter(g[[i]])
  deg[[i]] <- mean(degree(g[[i]]))
  plot[[i]] <- ggraph(g[[i]], layout = layout_in_circle(g[[i]])) +
    geom_edge_link() +
    geom_node_point(size = 1, shape=21, fill="#A70042") +
    theme(legend.position = "none")
  trans[[i]] <- transitivity(g[[i]], type = "global")
  }

# draw a histogram of the distribution of diameters of the 100 graphs
hist(
  unlist(diam),
  breaks = seq(from = -0.5, to = 11.5, by = 1))

# calculate the expected mean degree across the 100 graphs
(exp_mean_deg <- (m/(n * (n-1)/2))) * (n - 1)
[1] 2
# calculate the observed mean degree across the 100 graphs
(calc_mean_deg <- mean(unlist(deg)))
[1] 2
# calculate the global transitivity (clustering coefficient) across the 100 graphs
(calc_mean_trans <- mean(unlist(trans)))
[1] 0.1457863
# use the `plot_grid()` function from {cowplot} to plot all graphs in the list "plot"
do.call("plot_grid", plot)

Create and visualize 100 random graphs with the same properties using sample_gnp()

g <- list()
deg <- list()
diam <- list()
plot <- list()
trans <- list()

n <- 12
p <- 0.182
for (i in 1:100){
  g[[i]] <- sample_gnp(n = n, p = p)
  deg[[i]] <- mean(degree(g[[i]]))
  diam[[i]] <- diameter(g[[i]])
  plot[[i]] <- ggraph(g[[i]], layout = layout_in_circle(g[[i]])) +
    geom_edge_link() +
    geom_node_point(size = 1, shape=21, fill="#A70042") +
    theme(legend.position = "none")
  trans[[i]] <- transitivity(g[[i]], type = "global")
}

hist(
  unlist(diam),
  breaks = seq(from = -0.5, to = 11.5, by = 1))

(exp_mean_deg <- p * (n - 1))
[1] 2.002
(calc_mean_deg <- mean(unlist(deg)))
[1] 2.065
(calc_mean_trans <- mean(unlist(trans)))
[1] 0.1431013
do.call("plot_grid", plot)

Important Point 1

For a large random graph, the degree distribution is Poisson, i.e., it is centered on the average degree and the variance is around the same as the average degree…

n <- 5000 # set a large number of nodes
average_degee <- 15 # try alternatives, e.g., 5, 25, 40
g <- sample_gnp(n = n, p = average_degee/(n - 1))

# use a custom function to plot the degree distribution
plot_dd(g)

Important Point 2

Random graphs become nearly fully connected even at low average degree…

n <- 5000 # graph size... try 500, 1000, 2000, 10000
plot <- list() # list to hold plots for each average degree
prop <- list() # list to hold proportion of nodes in largest component
for (i in 1:12) { # i holds average degree...
  g <- sample_gnp(n = n, p = i/(n - 1))
  d <- tibble(
    degree = (1:length(degree_distribution(g))-1),
    freq = degree_distribution(g))
  plot[[i]] <- ggplot(d, aes(x = degree, y = freq)) +
    geom_line() +
    ggtitle(paste0(n, " nodes\nmean deg ", i))
  prop[[i]] <- components(g)$csize[[1]]/n # proportion of nodes in largest component
  }

do.call("plot_grid", plot)

d <- tibble(
  average_degree = c(1:12),
  proportion = unlist(prop))

ggplot(d, aes(x = average_degree, y = proportion)) +
  geom_line() +
  xlab("Average Degree") +
  ylab("Proportion of Nodes\nin Largest Component") +
  scale_x_continuous(breaks = c(1:12))

Important Point 3

Connected components of random graphs are compact, i.e., the diameter of the largest component is small even for large networks…

n <- rep(c(100, 500, 1000, 2500, 5000, 7500, 10000, 15000, 20000, 30000), each = 5)
average_degree <- 12 # try different average degrees
diam <- list() # list to hold diameter of largest component
size <- list() # list to hold size of graph
prop <- list()

for (i in 1:length(n)) { # i network size...
  g <- sample_gnp(n = n[i], p = average_degree/(n[i] - 1))
  size[[i]] <- n[i]
  diam[[i]] <- diameter(g) # this command takes some time to run on large graphs!
  prop[[i]] <- components(g)$csize[[1]]/n[i] # proportion of nodes in largest component
  }

d <- tibble(
  n = unlist(size),
  diam = unlist(diam),
  prop = unlist(prop))

ggplot(d, aes(x = n, y = diam, alpha = 0.1)) +
  geom_jitter(height = 0.1) +
  xlab("Number of Nodes") +
  scale_y_continuous(breaks = seq(0, max(d$diam) + 1, by = 1)) +
  ylab("Diameter of Largest Component") +
  theme(legend.position = "none")

Small World Random Graphs

The small-world model of Watts and Strogatz begins with a ring of nodes where each node is connected to a specified number of neigbors on each side. This graph is a regular lattice. Then, each edge is rewired with probability p (i.e., with probability p an edge is dropped and reconnected between two random nodes).

g <- sample_smallworld(dim=1, size=30, nei=1, p=0)
plot(g, layout = layout_with_kk(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=30, nei=2, p=0)
plot(g, layout = layout_with_kk(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=30, nei=4, p=0)
plot(g, layout = layout_with_kk(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=30, nei=4, p=0.05)
plot(g, layout = layout_with_kk(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=30, nei=4, p=0.1)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=30, nei=4, p=1)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=100, nei=4, p=0)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=100, nei=4, p=0.1)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

g <- sample_smallworld(dim=1, size=100, nei=4, p=0.2)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

When the rewiring probability is 1, random graphs created from the small world algorithm are very nearly equivalent to Erdos-Reyni random graphs…

g <- sample_smallworld(dim=1, size=100, nei=4, p=1) # this has a mean degree of 8
plot(g, layout = layout_in_circle(g))

plot_dd(g)

g <- sample_gnm(n = 100, m = 400) # corresponds to mean degree of 8 = m / ((n * (n -1))/2) * (n - 1)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

g <- sample_gnp(n = 100, p = 0.0808) # corresponds to mean degree of 8 = p * (n -1)
plot(g, layout = layout_in_circle(g))

plot_dd(g)

Important Point 1

With appropriate rewiring probabilities, small world random graphs can have more realistic measures of transitivity (i.e., that are more like those of real world graphs) than random graphs with similar numbers of vertices and edges. For example, compare the transitivities across the following…

# Harry Potter graph
hp <- read_csv("data/hp.csv", col_names = TRUE)
Rows: 222 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (4): from, to, from_name, to_name

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hp <- graph_from_data_frame(hp, directed = TRUE)
plot_dd(hp)

# similar small world graph
sm <- sample_smallworld(dim = 1, size = 64, nei = 4, p = 0.1) # try tweaking p... when p = 1, graph resembles random graph
plot_dd(sm)

# similar random graphs
sm <- sample_gnp(n = 64, p = 222/((64 * 63)/2))
plot_dd(sm)

sm <- sample_gnm(n = 64, m = 222)
plot_dd(sm)

Note, however, that the degree distributions and edge densities of both the small world and random graphs are poor matches to that of the “real world” Harry Potter graph… the degree distributions are unimodal, have much lower variance, and lack a long tail and the edge densities are much higher.

Scale-Free Random Graphs

Many real-world graphs have degree distributions where the modal degree is low and the distribution is long-tailed. These distributions generally follow a “power-low” where the frequency of nodes with a particular degree decays exponentially as degree increases. These are also known as “scale-free” networks. A random model of network formation that can produce these kinds of degree distributions is called “preferential attachment”, where as new nodes are added into a network, they are preferentially connected to existing nodes that already have links to other nodes. For example…

g <- sample_pa(n = 64, directed = FALSE)
plot.igraph(g, vertex.size = 5, vertex.label = NA)

plot_dd(g)